rm(list=ls())
setwd("/scratch/cds2083/")

# Libraries

library(foreign)
library(BayesTree)
library(KRLS)
library(arm)
library(e1071)
library(dummies)
library(SuperLearner)

# Functions

SL.KRLS <- function(Y, X, newX, family,...){
	if(family$family=="gaussian"){
		fit.krls <- krls(X=X, y=Y)
		pred <- predict.krls(fit.krls, newdata=as.matrix(newX))$fit
		fit <- list(object=fit.krls)
	}
	if(family$family=="binomial"){
		fit.krls <- krls(X=X, y=Y)
		pred <- predict.krls(fit.krls, newdata=as.matrix(newX))$fit
		fit <- list(object=fit.krls)
	}
	out <- list(pred=pred, fit=fit)
	class(out$fit) <- c("SL.KRLS")
	return(out)
}

predict.SL.KRLS <- function(object, newdata, ...){
	pred <- predict(object, 
				newdata=newdata)$fit
	return(pred)
}

SL.bart <- function (Y, X, newX, family, ntree = 300, sigdf = 3, 
 				sigquant = 0.9, k = 2, power = 2, base = 0.95, 
 				binaryOffset = 0, ndpost = 1000, nskip = 100, ...) 
{
    if (family$family == "gaussian") {
        fitBart <- bart(x.train = X, y.train = Y, x.test = newX, 
            ntree = ntree, sigdf = sigdf, sigquant = sigquant, 
            k = k, power = power, base = base, binaryOffset = binaryOffset, 
            ndpost = ndpost, nskip = nskip, verbose = FALSE)
        pred <- fitBart$yhat.test.mean
    }
    if (family$family == "binomial") {
        fitBart <- bart(x.train = X, y.train = as.factor(Y), 
            x.test = newX, ntree = ntree, sigdf = sigdf, sigquant = sigquant, 
            k = k, power = power, base = base, binaryOffset = binaryOffset, 
            ndpost = ndpost, nskip = nskip, verbose = FALSE)
        pred <- pnorm(apply(fitBart$yhat.test, 2, mean))
    }
    fit <- list(object = fitBart)
    out <- list(pred = pred, fit = fit)
    class(out$fit) <- c("SL.bart")
    return(out)
}



# Data

coldatsc.all <- read.dta("COLOMBIA_STEP9_Interventions_REDO.dta")

for(impup in 5:6){
coldatsc <- subset(coldatsc.all, m_impute==impup)
coldatsc$p8_sex <- as.numeric(as.character(coldatsc$p8_sex)=="Femenino")
coldatsc$p104_minor <- as.numeric(as.character(coldatsc$p104_minor)=="S\xed")
mpiodummy.all <- dummy(coldatsc$mpio)
mpiodummy <- mpiodummy.all[,-match("mpioBOGOTA", colnames(mpiodummy.all))]
colnames(mpiodummy) <- sub(" ","_",colnames(mpiodummy))
coldat <- cbind(coldatsc, mpiodummy)

# X vars

mpio_FE <- colnames(mpiodummy)

controls_reint_ix <- c("index_reint_prog_dissatis",
					"index_reint_state",
					"index_reint_prog_exposure")

controls_conf_ix <- c("index_con_time",
					"index_con_rank_mid_commander",
					"index_con_rank_top_commander",
					"index_con_role_combatant",
					"index_con_unit_cohesion",
					"index_con_unit_discipline",
					"index_con_unit_hierarchy",
					"index_con_unit_motivation") 

controls_demob_ix <- c("index_demob_coerced",
					"index_demob_type") 

controls_join_ix <- c("index_join_ed",
					"index_join_fam",
					"index_join_forced",
					"index_join_griev",
					"index_join_ideo",
					"index_join_material",
					"index_join_network",
					"index_join_psych",
					"index_join_security",
					"index_join_welfare")

controls_gen <- c("p8_sex",
					"p11_race_REC1",
					"p9_age",
					"p104_minor",
					"p143_disabled_REC1",
					"p25_grp_demob_name_AUC",
					"p122_risk_all",
					"p121_time_all",
					"p19_ageenter_group1")

X.names <-c(controls_reint_ix,
			controls_conf_ix,
			controls_demob_ix,
			controls_join_ix,
			controls_gen,
			mpio_FE)

# Int vars

# Original vars

int_vars <- c(	"p136_emp_REC3",
				"p145_atrisk_REC2",
				"p111_gov_promises_1year_REC1",
				"index_reint_psych_neg",
				"p150_know_excom_REC1b",
				"p66_sup1_talk_REC1")

# Binary ints
int_vars_bin <- c(	"int_emp",
					"int_secure",
					"int_confident",
					"int_upbeat",
					"int_excompeers",
					"int_commander")

# Check to be sure no missing data on relevant variables:

as.matrix(apply(coldat[,c("recid_scale", X.names, int_vars, int_vars_bin)],
			2,
			function(x)sum(is.na(x))))

# Intervention pscores
for(intvarup in 1:length(int_vars)){
#intvarup <- 1
	X <- coldat[,c(X.names, int_vars[-intvarup])]
	Y <- coldat[,int_vars_bin[intvarup]]

	nu.admis <- min(min(c(mean(Y), 1-mean(Y))), .5)
	SL.svm.admis <- function(...,nu=nu.admis){
		SL.svm(...,nu=nu)
	}

	print(paste("int-", intvarup, " ", sep=""))
	date()
	fitInterventions.pscore <- SuperLearner(	Y=Y,
									X=X,
						SL.library = c(	"SL.glm", 
										"SL.KRLS", 
										"SL.bart",
										"SL.bayesglm",
										"SL.svm.admis"
										),
						family=binomial(),
						method="method.NNLS",
						verbose=TRUE,
						cvControl=list(V=10))
	date()
	save.image(paste("col-interv-pscore-REDO-", intvarup,"-","imp",impup, ".RData", sep=""))
}
}


############
# Add the propensity score predictions to the dataset:
############

rm(list=ls())
#setwd("~/Dropbox/COLOMBIA_PRIVATE/ANALYSIS/")
setwd("/scratch/cds2083/")
for(impup in 5:6){
intvarup <- 1
load(paste("col-interv-pscore-REDO-", intvarup,"-","imp",impup, ".RData", sep=""))
coldat.pscore <- coldat
coldat.pscore[,paste("p_",int_vars_bin[intvarup], sep="")] <- fitInterventions.pscore$SL.predict

rm(list=setdiff(ls(), c("impup", "intvarup","coldat.pscore")))
for(intvarup in 2:6){
	load(paste("col-interv-pscore-REDO-", intvarup,"-","imp",impup, ".RData", sep=""))
	coldat.pscore[,paste("p_",int_vars_bin[intvarup], sep="")] <- fitInterventions.pscore$SL.predict	
}

names.to.change <- names(coldat.pscore)[substr(names(coldat.pscore), 1, 1)=="_"]
new.name <- substring(names.to.change,first=2, last=20)
names(coldat.pscore)[substr(names(coldat.pscore), 1, 1)=="_"] <- new.name

write.csv(coldat.pscore,
			file=paste("COLOMBIA_STEP9_PSCORE_IMP",impup,".csv",sep=""),
			row.names=F)
}
############
# Estimate effects
############
library(foreign)
library(survey)
library(xtable)

rm(list=ls())
#setwd("~/Dropbox/COLOMBIA_PRIVATE/ANALYSIS/")
setwd("/scratch/cds2083/")
for(impup in 5:6){
# Data
coldat <- read.csv(paste("COLOMBIA_STEP9_PSCORE_IMP",
							impup,".csv", sep=""))

# For the one non-participant respondent
coldat$wgt_main[is.na(coldat$wgt_main)] <- 1

# Hypothetical interventions
int.vars <- names(coldat)[substr(names(coldat), 1, 4)=="int_"]
fitList <- matrix(NA, ncol=2, nrow=length(int.vars))
colnames(fitList) <- c("b","se")
rownames(fitList) <- int.vars

for(i in 1:length(int.vars)){
	#i <- 1

	# Construct D

	Aup <- int.vars[i]
	# To deal with the truncated variable names

	g.hat 		<- coldat[,paste("p_",Aup, sep="")]      
	I.A 		<- coldat[,Aup]       

	Y <- coldat[,"recid_scale"]             

	D.ipw <- ((I.A/g.hat)-1)*Y

	coldat[,paste("Dipw_",Aup, sep="")] <- D.ipw
}


colsvy <- svydesign(	ids = ~svypsu,
						strata = ~svystrat,
						weights=~wgt_main,
						data=coldat,
						nest=T)

for(i in 1:length(int.vars)){

	# Compute effect estimates
	Aup <- int.vars[i]

	formipw <- as.formula(paste("~Dipw_",Aup, sep=""))
	psi.ipw <- svymean(formipw, colsvy, na.rm=T)

	fitList[i,] <- c(psi.ipw[1],sqrt(attributes(psi.ipw)$var))
}
write.csv(fitList, file=paste("int-out-imp-", impup,".csv", sep=""))
}

